library(tidyverse)
library(plotly)
library(reshape2)
Attaching package: ‘reshape2’
The following object is masked from ‘package:tidyr’:
smiths
exif <- read_csv("capstone_exif.csv")
Rows: 1116 Columns: 15── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (10): DateTimeOriginal, CreateDate, ModifyDate, Software, LensInfo, LensModel, ExposureTime, FocalLength, FocalLengthIn3...
dbl (5): FlickrID, JFIFVersion, ISO, FNumber, BrightnessValue
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
img_data <- read_csv("capstone_img_data.csv")
Rows: 10955 Columns: 87── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (16): flickr, img_loc, the_image, crop_coords, center_rgb, post_top_hsl, post_2_hsl, post_3_hsl, post_4_hsl, post_5_hsl,...
dbl (70): using_id, img_width, img_height, do_img_at, sub_img, full_id, r_min, r_max, r_mean, r_mode, g_min, g_max, g_mean, ...
num (1): vivid_count
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# after working with this data for 12 hours, I find out there are hidden characeters in my data that were not displaying in Excel.
imgsd_tidy$post_top_count <- as.integer(imgsd_tidy$post_top_count)
imgsd_tidy$post_2_count <- as.integer(imgsd_tidy$post_2_count)
imgsd_tidy$post_3_count <- as.integer(imgsd_tidy$post_3_count)
imgsd_tidy$post_4_count <- as.integer(imgsd_tidy$post_4_count)
imgsd_tidy$post_5_count <- as.integer(imgsd_tidy$post_5_count)
imgsd_tidy$post_6_count <- as.integer(imgsd_tidy$post_6_count)
imgsd_tidy$common_hsl_1_count <- as.integer(imgsd_tidy$common_hsl_1_count)
imgsd_tidy$common_hsl_2_count <- as.integer(imgsd_tidy$common_hsl_2_count)
imgsd_tidy$common_hsl_3_count <- as.integer(imgsd_tidy$common_hsl_3_count)
imgsd_tidy$common_hsl_4_count <- as.integer(imgsd_tidy$common_hsl_4_count)
imgsd_tidy$full_red_count <- as.integer(imgsd_tidy$full_red_count)
imgsd_tidy$full_orange_count <- as.integer(imgsd_tidy$full_orange_count)
imgsd_tidy$full_yellow_count <- as.integer(imgsd_tidy$full_yellow_count)
imgsd_tidy$full_green_count <- as.integer(imgsd_tidy$full_green_count)
imgsd_tidy$full_cyan_count <- as.integer(imgsd_tidy$full_cyan_count)
imgsd_tidy$full_blue_count <- as.integer(imgsd_tidy$full_blue_count)
imgsd_tidy$full_purple_count <- as.integer(imgsd_tidy$full_purple_count)
imgsd_tidy$full_mag_count <- as.integer(imgsd_tidy$full_mag_count)
imgsd_tidy$visib_red_count <- as.integer(imgsd_tidy$visib_red_count)
imgsd_tidy$visib_orange_count <- as.integer(imgsd_tidy$visib_orange_count)
imgsd_tidy$visib_yellow_count <- as.integer(imgsd_tidy$visib_yellow_count)
imgsd_tidy$visib_green_count <- as.integer(imgsd_tidy$visib_green_count)
imgsd_tidy$visib_cyan_count <- as.integer(imgsd_tidy$visib_cyan_count)
imgsd_tidy$visib_blue_count <- as.integer(imgsd_tidy$visib_blue_count)
imgsd_tidy$visib_purple_count <- as.integer(imgsd_tidy$visib_purple_count)
imgsd_tidy$visib_mag_count <- as.integer(imgsd_tidy$visib_mag_count)
imgsd_tidy$vivid_red_count <- as.integer(imgsd_tidy$vivid_red_count)
imgsd_tidy$vivid_orange_count <- as.integer(imgsd_tidy$vivid_orange_count)
imgsd_tidy$vivid_yellow_count <- as.integer(imgsd_tidy$vivid_yellow_count)
imgsd_tidy$vivid_green_count <- as.integer(imgsd_tidy$vivid_green_count)
imgsd_tidy$vivid_cyan_count <- as.integer(imgsd_tidy$vivid_cyan_count)
imgsd_tidy$vivid_blue_count <- as.integer(imgsd_tidy$vivid_blue_count)
imgsd_tidy$vivid_purple_count <- as.integer(imgsd_tidy$vivid_purple_count)
imgsd_tidy$vivid_mag_count <- as.integer(imgsd_tidy$vivid_mag_count)
imgsd_tidy$vivid_count <- as.integer(imgsd_tidy$vivid_count)
imgsd_tidy$gen_bright_count <- as.integer(imgsd_tidy$gen_bright_count)
imgsd_tidy$gen_dark_count <- as.integer(imgsd_tidy$gen_dark_count)
imgsd_tidy <- imgsd_tidy %>% mutate(total_pixels = full_red_count +
full_orange_count +
full_yellow_count +
full_green_count +
full_cyan_count +
full_blue_count +
full_purple_count +
full_mag_count)
# more type conversions
imgsd_tidy$total_pixels <- as.integer(imgsd_tidy$total_pixels)
imgsd_tidy$light_mean_val <- as.numeric(imgsd_tidy$light_mean_val)
imgsd_tidy$post_num_regions <- as.integer(imgsd_tidy$post_num_regions)
imgsd_tidy$r_mean <- as.numeric(imgsd_tidy$r_mean)
imgsd_tidy$g_mean <- as.numeric(imgsd_tidy$g_mean)
imgsd_tidy$b_mean <- as.numeric(imgsd_tidy$b_mean)
#establishing ratio columns
imgsd_ratio <- imgsd_tidy %>%
mutate(ratio_post_top_hsl = post_top_count / total_pixels) %>%
mutate(ratio_post_2_hsl = post_2_count / total_pixels) %>%
mutate(ratio_post_3_hsl = post_3_count / total_pixels) %>%
mutate(ratio_post_4_hsl = post_4_count / total_pixels) %>%
mutate(ratio_post_5_hsl = post_5_count / total_pixels) %>%
mutate(ratio_post_6_hsl = post_6_count / total_pixels) %>%
mutate(ratio_full_red = full_red_count / total_pixels) %>%
mutate(ratio_full_oragne = full_orange_count / total_pixels) %>%
mutate(ratio_full_yellow = full_yellow_count / total_pixels) %>%
mutate(ratio_full_green = full_green_count / total_pixels) %>%
mutate(ratio_full_cyan = full_cyan_count / total_pixels) %>%
mutate(ratio_full_blue = full_blue_count / total_pixels) %>%
mutate(ratio_full_purple = full_purple_count / total_pixels) %>%
mutate(ratio_full_mag = full_mag_count / total_pixels) %>%
mutate(ratio_full_red = full_red_count / total_pixels) %>%
mutate(ratio_visib_red = visib_red_count / total_pixels) %>%
mutate(ratio_visib_oragne = visib_orange_count / total_pixels) %>%
mutate(ratio_visib_yellow = visib_yellow_count / total_pixels) %>%
mutate(ratio_visib_green = visib_green_count / total_pixels) %>%
mutate(ratio_visib_cyan = visib_cyan_count / total_pixels) %>%
mutate(ratio_visib_blue = visib_blue_count / total_pixels) %>%
mutate(ratio_visib_purple = visib_purple_count / total_pixels) %>%
mutate(ratio_visib_mag = visib_mag_count / total_pixels) %>%
mutate(ratio_vivid_red = vivid_red_count / total_pixels) %>%
mutate(ratio_vivid_oragne = vivid_orange_count / total_pixels) %>%
mutate(ratio_vivid_yellow = vivid_yellow_count / total_pixels) %>%
mutate(ratio_vivid_green = vivid_green_count / total_pixels) %>%
mutate(ratio_vivid_cyan = vivid_cyan_count / total_pixels) %>%
mutate(ratio_vivid_blue = vivid_blue_count / total_pixels) %>%
mutate(ratio_vivid_purple = vivid_purple_count / total_pixels) %>%
mutate(ratio_vivid_mag = vivid_mag_count / total_pixels) %>%
mutate(ratio_vivid = vivid_count / total_pixels)
imgsd_tidy <- select(img_data, -c(flickr, img_loc, the_image, img_width, img_height, crop_coords, do_img_at, r_mode, b_mode, g_mode))
imgsd_tidy <- replace_na(imgsd_tidy, list(
post_2_hsl = "(-1, -1, -1)",
post_3_hsl = "(-1, -1, -1)",
post_4_hsl = "(-1, -1, -1)",
post_5_hsl = "(-1, -1, -1)",
post_6_hsl = "(-1, -1, -1)"
)
)
exif_tidy <- exif_tidy %>% separate(create_date, into = c('full_date', 'time'), sep = " ", remove = TRUE) %>% separate(full_date, into = c('year', 'month', 'day'), sep = ":", remove = FALSE)
exif_tidy$date <- as.Date(paste("1881", exif_tidy$month, exif_tidy$day, sep = "-"), format ="%Y-%m-%d")
subimg_qty <- imgsd_tidy %>% count(using_id)
good_ids <- subimg_qty[subimg_qty$n >=6, "using_id"]
imgsd_tidy <- imgsd_tidy %>% filter(using_id %in% good_ids$using_id)
exif_tidy <- exif_tidy %>% filter(flickr_id %in% good_ids$using_id)
# after working with this data for 12 hours, I find out there are hidden characeters in my data that were not displaying in Excel.
imgsd_tidy$post_top_count <- as.integer(imgsd_tidy$post_top_count)
imgsd_tidy$post_2_count <- as.integer(imgsd_tidy$post_2_count)
imgsd_tidy$post_3_count <- as.integer(imgsd_tidy$post_3_count)
imgsd_tidy$post_4_count <- as.integer(imgsd_tidy$post_4_count)
imgsd_tidy$post_5_count <- as.integer(imgsd_tidy$post_5_count)
imgsd_tidy$post_6_count <- as.integer(imgsd_tidy$post_6_count)
imgsd_tidy$common_hsl_1_count <- as.integer(imgsd_tidy$common_hsl_1_count)
imgsd_tidy$common_hsl_2_count <- as.integer(imgsd_tidy$common_hsl_2_count)
imgsd_tidy$common_hsl_3_count <- as.integer(imgsd_tidy$common_hsl_3_count)
imgsd_tidy$common_hsl_4_count <- as.integer(imgsd_tidy$common_hsl_4_count)
imgsd_tidy$full_red_count <- as.integer(imgsd_tidy$full_red_count)
imgsd_tidy$full_orange_count <- as.integer(imgsd_tidy$full_orange_count)
imgsd_tidy$full_yellow_count <- as.integer(imgsd_tidy$full_yellow_count)
imgsd_tidy$full_green_count <- as.integer(imgsd_tidy$full_green_count)
imgsd_tidy$full_cyan_count <- as.integer(imgsd_tidy$full_cyan_count)
imgsd_tidy$full_blue_count <- as.integer(imgsd_tidy$full_blue_count)
imgsd_tidy$full_purple_count <- as.integer(imgsd_tidy$full_purple_count)
imgsd_tidy$full_mag_count <- as.integer(imgsd_tidy$full_mag_count)
imgsd_tidy$visib_red_count <- as.integer(imgsd_tidy$visib_red_count)
imgsd_tidy$visib_orange_count <- as.integer(imgsd_tidy$visib_orange_count)
imgsd_tidy$visib_yellow_count <- as.integer(imgsd_tidy$visib_yellow_count)
imgsd_tidy$visib_green_count <- as.integer(imgsd_tidy$visib_green_count)
imgsd_tidy$visib_cyan_count <- as.integer(imgsd_tidy$visib_cyan_count)
imgsd_tidy$visib_blue_count <- as.integer(imgsd_tidy$visib_blue_count)
imgsd_tidy$visib_purple_count <- as.integer(imgsd_tidy$visib_purple_count)
imgsd_tidy$visib_mag_count <- as.integer(imgsd_tidy$visib_mag_count)
imgsd_tidy$vivid_red_count <- as.integer(imgsd_tidy$vivid_red_count)
imgsd_tidy$vivid_orange_count <- as.integer(imgsd_tidy$vivid_orange_count)
imgsd_tidy$vivid_yellow_count <- as.integer(imgsd_tidy$vivid_yellow_count)
imgsd_tidy$vivid_green_count <- as.integer(imgsd_tidy$vivid_green_count)
imgsd_tidy$vivid_cyan_count <- as.integer(imgsd_tidy$vivid_cyan_count)
imgsd_tidy$vivid_blue_count <- as.integer(imgsd_tidy$vivid_blue_count)
imgsd_tidy$vivid_purple_count <- as.integer(imgsd_tidy$vivid_purple_count)
imgsd_tidy$vivid_mag_count <- as.integer(imgsd_tidy$vivid_mag_count)
imgsd_tidy$vivid_count <- as.integer(imgsd_tidy$vivid_count)
imgsd_tidy$gen_bright_count <- as.integer(imgsd_tidy$gen_bright_count)
imgsd_tidy$gen_dark_count <- as.integer(imgsd_tidy$gen_dark_count)
imgsd_tidy <- imgsd_tidy %>% mutate(total_pixels = full_red_count +
full_orange_count +
full_yellow_count +
full_green_count +
full_cyan_count +
full_blue_count +
full_purple_count +
full_mag_count)
# more type conversions
imgsd_tidy$total_pixels <- as.integer(imgsd_tidy$total_pixels)
imgsd_tidy$light_mean_val <- as.numeric(imgsd_tidy$light_mean_val)
imgsd_tidy$post_num_regions <- as.integer(imgsd_tidy$post_num_regions)
imgsd_tidy$r_mean <- as.numeric(imgsd_tidy$r_mean)
imgsd_tidy$g_mean <- as.numeric(imgsd_tidy$g_mean)
imgsd_tidy$b_mean <- as.numeric(imgsd_tidy$b_mean)
#establishing ratio columns
imgsd_ratio <- imgsd_tidy %>%
mutate(ratio_post_top_hsl = post_top_count / total_pixels) %>%
mutate(ratio_post_2_hsl = post_2_count / total_pixels) %>%
mutate(ratio_post_3_hsl = post_3_count / total_pixels) %>%
mutate(ratio_post_4_hsl = post_4_count / total_pixels) %>%
mutate(ratio_post_5_hsl = post_5_count / total_pixels) %>%
mutate(ratio_post_6_hsl = post_6_count / total_pixels) %>%
mutate(ratio_full_red = full_red_count / total_pixels) %>%
mutate(ratio_full_oragne = full_orange_count / total_pixels) %>%
mutate(ratio_full_yellow = full_yellow_count / total_pixels) %>%
mutate(ratio_full_green = full_green_count / total_pixels) %>%
mutate(ratio_full_cyan = full_cyan_count / total_pixels) %>%
mutate(ratio_full_blue = full_blue_count / total_pixels) %>%
mutate(ratio_full_purple = full_purple_count / total_pixels) %>%
mutate(ratio_full_mag = full_mag_count / total_pixels) %>%
mutate(ratio_full_red = full_red_count / total_pixels) %>%
mutate(ratio_visib_red = visib_red_count / total_pixels) %>%
mutate(ratio_visib_oragne = visib_orange_count / total_pixels) %>%
mutate(ratio_visib_yellow = visib_yellow_count / total_pixels) %>%
mutate(ratio_visib_green = visib_green_count / total_pixels) %>%
mutate(ratio_visib_cyan = visib_cyan_count / total_pixels) %>%
mutate(ratio_visib_blue = visib_blue_count / total_pixels) %>%
mutate(ratio_visib_purple = visib_purple_count / total_pixels) %>%
mutate(ratio_visib_mag = visib_mag_count / total_pixels) %>%
mutate(ratio_vivid_red = vivid_red_count / total_pixels) %>%
mutate(ratio_vivid_oragne = vivid_orange_count / total_pixels) %>%
mutate(ratio_vivid_yellow = vivid_yellow_count / total_pixels) %>%
mutate(ratio_vivid_green = vivid_green_count / total_pixels) %>%
mutate(ratio_vivid_cyan = vivid_cyan_count / total_pixels) %>%
mutate(ratio_vivid_blue = vivid_blue_count / total_pixels) %>%
mutate(ratio_vivid_purple = vivid_purple_count / total_pixels) %>%
mutate(ratio_vivid_mag = vivid_mag_count / total_pixels) %>%
mutate(ratio_vivid = vivid_count / total_pixels)
Due to a very large number of images that have fewer than 2% vivid pixels, Vivid Ratios are separated out into three segments: 0% vivid pixels, 0-1% vivid pixels, and more than 1% vivid pixels
vivid_ratio <- imgsd_ratio %>% select(ratio_vivid)
vivid_ratio$ratio_vivid <- as.numeric(vivid_ratio$ratio_vivid)
vivid_ratio <- vivid_ratio %>% mutate(
ratio_rank = case_when(
ratio_vivid == 0 ~ 0,
ratio_vivid > 0 & ratio_vivid < 0.01 ~ 1,
ratio_vivid >= 0.01 ~ 2)
)
rr_counts <- count(vivid_ratio, ratio_rank)
vrr_pie <- plot_ly(rr_counts, labels=~factor(ratio_rank), values= ~n, type='pie')
vrr_pie
# 0 -> No Vivid Pixels
# 1 -> 0-1% vivid pixels
# 2 -> more than 1% vivid pixels
Less vivid pixels than expected! Way less! Only 359 images/subimages had more than 1% of pixels with more than 70% saturation and lightness between 40 and 70%
Now let’s get back to seeing the distribution of those 359 values:
vivid_hist <- vivid_ratio %>% filter(ratio_rank == 2)
fig <- plot_ly(vivid_hist,
x= ~ratio_vivid,
type = "histogram")
fig
Future investigation will perform these calculations on the main dataframe in order to access specific image IDs and evaluate the subjective qualities of the top vivid images.
Investigating the distribution of how many posterization “clumps” were in each image
#first setting up filters for more useful faceting going forward
r_fig_filter_0 <- imgsd_ratio %>% filter(imgsd_ratio$sub_img == 0)
r_fig_filter_a <- imgsd_ratio[imgsd_ratio$sub_img %in% c(1, 2, 3), ]
r_fig_filter_b <- imgsd_ratio[imgsd_ratio$sub_img %in% c(4, 5, 6), ]
r_fig_filter_c <- imgsd_ratio[imgsd_ratio$sub_img %in% c(7, 8, 9), ]
fig_0 <- ggplot(r_fig_filter_0,
aes(x=post_num_regions)) +
geom_histogram(binwidth = 5)
fig_a <- ggplot(r_fig_filter_a,
aes(x=post_num_regions)) +
geom_histogram(binwidth = 5) +
facet_grid (~ sub_img)
fig_b <- ggplot(r_fig_filter_b,
aes(x=post_num_regions)) +
geom_histogram(binwidth = 5) +
facet_grid (~ sub_img)
fig_c <- ggplot(r_fig_filter_c,
aes(x=post_num_regions)) +
geom_histogram(binwidth = 5) +
facet_grid (~ sub_img)
fig_0b <- ggplot(r_fig_filter_0,
aes(x=post_num_regions)) +
geom_histogram(binwidth = 50)
fig_0
fig_0b
fig_a
fig_b
fig_c
No huge conclusions to draw about the distribution in sub_image 0, other than the x-scale is shown as roughly 10x (slightly less) than the subimages. Also mildly interesting (also on sub_img 0), the max count for any given bin (at bin-width 5), is a fraction of the scale of the other sub_images. Bumping the sub_img 0 binwidth up to 50 (10x), brings it more in line with the counts in the sub_images, but the long tail on sub_img 0 keeps it from matching exactly.
Otherwise, the only noteworthy observation is the less-intense right skew on sub_images 2, 5, and 8.
These sub-images represent the horizontal middle third of their source images, mimicking the artistic principles of the rule of thirds and putting focal points near the center of the image.
EXIF data, I have not forsaken you.
fig <- ggplot(exif_tidy,
aes(x=brightness_value)) +
geom_histogram(bins=250)
fig
That’s the most normal distribution we’ve seen yet! But still pretty irregular. Let’s see how it compares to the HSL data
fig_all <- ggplot(imgsd_ratio,
aes(x=light_mean_val)) +
geom_histogram(bins=250)
fig_0 <- ggplot(r_fig_filter_0,
aes(x=light_mean_val)) +
geom_histogram(bins=250)
fig_a <- ggplot(r_fig_filter_a,
aes(x=light_mean_val)) +
geom_histogram(bins=250) +
facet_grid(~ sub_img)
fig_b <- ggplot(r_fig_filter_b,
aes(x=light_mean_val)) +
geom_histogram(bins=250) +
facet_grid(~ sub_img)
fig_c <- ggplot(r_fig_filter_c,
aes(x=light_mean_val)) +
geom_histogram(bins=250) +
facet_grid(~ sub_img)
fig_all
fig_0
fig_a
fig_b
fig_c
Observations: Variation among sub-images is fairly minor, with some some possibility that corner quadrants (1, 3, 7, 9) have wider distributions overall. Lightness values processed for this dataset are centered around 0.625 with a slight left skew. While the EXIF data has a less-normal distribution, it does seem to have a center slightly to the right of its baseline value, but the irregularity of the shape looks more like a right skew than left.
fig <- plot_ly(imgsd_ratio, alpha = .4)
fig <- fig %>% add_histogram(x = ~r_mean)
fig <- fig %>% add_histogram(x = ~g_mean)
fig <- fig %>% add_histogram(x = ~b_mean)
fig <- fig %>% layout(barmode = 'overlay', colorway=c('red', 'green', 'blue'))
fig
Observation: No special information here. As expected, it looks a lot like the mean lightness value distrbution. Almost as if r+g+b = light….
fig <- plot_ly(exif_tidy, x = ~date, type = 'histogram')
fig_all <- plot_ly(exif_tidy, x = ~full_date, type = 'histogram')
fig
fig_all
Observation: Even when adjusting for year, the distribution of images is highly irregular. As a month, August is over-represented, and as an individual date, Will and Taylor’s wedding is over-represented.
exif_tidy$time <- strptime(exif_tidy$time, format = "%H:%M%S")
fig <- plot_ly(exif_tidy, x = ~time, type = 'histogram')
fig
Error: C stack usage 15924272 is too close to the limit
Final data prep!
Slimming down the EXIF data frame….
#slimming down exif columns....
simple_exif <- exif_tidy %>% select(c(flickr_id, date, month, time, software, jfifversion, brightness_value))
… and adding those EXIF columns to the main dataframe
simple_exif <- simple_exif %>%
mutate(flickr_id = as.character(flickr_id))
# bringing it all full circle with this variable choice
all_img_data <- imgsd_ratio %>%
left_join(simple_exif, by = c("using_id" = "flickr_id"))
Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
#this throws a warning about many-to-many relationships, but this is expected behavior
This space reserved for re-creating sub-frames for ‘faceting’
At last, let’s revisit those hypotheses
\(Ho:\) There is no correlation between time of year and color values
\(Ha:\) Warm color values are more prominent between May and September
Gut Check - Due to the uneven distribution of sample images over time, I don’t have a strong expectation of valid results.
exif_tidy$month <- as.integer(exif_tidy$month)
season_split <- exif_tidy %>% mutate(
season = case_when(
month >= 5 & month <= 9 ~ 1, TRUE ~ 0)
)
season_counts <- table(season_split$season)
season_data <- data.frame(ratio = names(season_counts), count = as.vector(season_counts))
season_data
On the other hand, summer values (622) don’t outrageously outnumber non-summer values.
Now to collect the hue information we will be comparing
all_img_data <- all_img_data %>%
mutate(visib_warm = visib_red_count + visib_orange_count + visib_yellow_count + visib_mag_count) %>%
mutate(visib_cool = visib_green_count + visib_cyan_count + visib_blue_count + visib_purple_count)
h1_frame <- all_img_data %>%
select(c(using_id, sub_img, total_pixels, visib_warm, visib_cool, date, month)) %>%
mutate(warm_ratio = visib_warm / total_pixels) %>%
mutate(cool_ratio = visib_cool/total_pixels) %>%
mutate(season = case_when(month >= 5 & month <= 9 ~ 1, TRUE ~ 0)) %>%
mutate(ratio_diff = warm_ratio - cool_ratio)
t.test(warm_ratio ~ season, h1_frame)
Welch Two Sample t-test
data: warm_ratio by season
t = 11.179, df = 9078.6, p-value < 2.2e-16
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
0.01619583 0.02308347
sample estimates:
mean in group 0 mean in group 1
0.04115762 0.02151798
Despite my hesitation, the results of the T-test are strongly in favor of rejecting the null hypothesis that there is no difference in visible warmth throughout the year. A high t-value and low p-value are both in support of this conclusion.
Additional Hypotheses:
\(Ho:\) There is no correlation between time of day and lightness values
\(Ha:\) Lightness values are higher between 6 am and 6pm
\(Ho:\) There is no correlation between saturation and being a picture of my cat
\(Ha:\) Low saturation values are increasingly common over time, especially in central sub-images
\(Ho:\) Vivid ratio (percentage of vivid pixels) is uniformly distributed among all Software types
\(Ha:\) Vivid ratio is consistently highest in Slow Shutter Cam photos without JFIF values